# R Options
options(stringsAsFactors=FALSE,
        "citation_format"="pandoc", 
        dplyr.summarise.inform=FALSE, 
        knitr.table.format="html",
        future.globals.maxSize=2000000000, mc.cores=4, 
        future.fork.enable=TRUE, future.plan="multicore",
        future.rng.onMisuse="ignore")

# Required libraries
library(Seurat) # main
library(ggplot2) # plots
library(patchwork) # combination of plots
library(magrittr) # %>% operator

# Other libraries we use
# Knit: knitr
# Data handling: dplyr, tidyr, purrr
# Tables: kableExtra
# Plots: ggsci

# Knitr default options
knitr::opts_chunk$set(echo=TRUE,                     # output code
                      cache=FALSE,                   # do not cache results
                      message=TRUE,                  # show messages
                      warning=TRUE,                  # show warnings
                      tidy=FALSE,                    # do not auto-tidy-up code
                      fig.width=10,                  # default fig width in inches
                      class.source="fold-hide",      # by default collapse code blocks
                      dev=c("png", "pdf"),           # create figures in png and pdf; the first device (png) will be used for HTML output
                      dev.args=list(png=list(type="cairo"),  # png: use cairo - works on cluster, supports anti-aliasing (more smooth)
                                    pdf=list(bg="white")),     # pdf: use cairo - works on cluster, supports anti-aliasing (more smooth)
                      dpi=96,                        # figure resolution (note: final = dpi*fig.retina; will not work with pandoc see below)
                      fig.retina=2                   # retina multiplier for HTML output (note: final = dpi*fig.retina)
                                     
)
# Path for figures in png and pdf format
knitr::opts_chunk$set(fig.path=paste(params$path_out, "figures/", sep="/"))

# When using pandoc, setting the dpi via opts_chunk does not work (https://github.com/yihui/knitr/issues/901)
# As workaround, we define a function that takes care of that
#knitr::opts_hooks$set(dpi = function(options) {options$dpi = 40;options})

# Git directory and files to source must be done first, then all helper functions can be sourced
git_files_to_source = c("R/functions_io.R",
              "R/functions_plotting.R",
              "R/functions_util.R")
git_files_to_source = paste(params$path_to_git, git_files_to_source, sep="/")
file_exists = purrr::map_lgl(git_files_to_source, file.exists)

if (any(!file_exists)) stop(paste("The following files could not be found:",paste(git_files_to_source[!file_exists], collapse=", "), ". Please check the git directory at '", params$path_to_git, "'.!"))
invisible(purrr::map(git_files_to_source, source))

# Debugging mode: 
# "default_debugging" for default, "terminal_debugger" for debugging without X11, "print_traceback" for non-interactive sessions 
invisible(switch (params$debugging_mode, 
        default_debugging=on_error_default_debugging(), 
        terminal_debugger=on_error_start_terminal_debugger(),
        print_traceback=on_error_just_print_traceback(),
        on_error_default_debugging()))

# Set output hooks
knitr::knit_hooks$set(message=format_message, warning=format_warning)

# Create output directories
if (!file.exists(params$path_out)) dir.create(params$path_out, recursive=TRUE, showWarnings=FALSE)

# Do checks
error_messages = c()
# Check installed packages
error_messages = c(error_messages, check_installed_packages())
# Check parameters TODO
#error_messages = c(error_messages, check_parameters(param))

Read input data

In this first section of the report, we begin by printing mapping statistics that have been produced prior to this workflow.

# Are statistics provided?
if (!is.null(params$stats)) {
  cat("\n## Stats {.tabset}\n\n")
  mapping_stats = read.delim(params$stats,sep=",", header=FALSE, check.names=FALSE) %>% t() %>% as.data.frame()
  colnames(mapping_stats) = c("V1","Dataset")
  rownames(mapping_stats) = mapping_stats$V1
  mapping_stats$V1 = NULL

  mapping_stats_general = mapping_stats[!grepl("^Antibody:",rownames(mapping_stats)),,drop=FALSE]
  mapping_stats_antibodies = mapping_stats[grepl("^Antibody:",rownames(mapping_stats)),,drop=FALSE]
  
  cat("\n### General\n\n")
  print(knitr::kable(mapping_stats_general, align="l", caption="General statistics") %>% 
  kableExtra::kable_styling(bootstrap_options=c("striped", "hover")))
 
  cat("\n### Antibodies\n\n")
  print(knitr::kable(mapping_stats_antibodies, align="l", caption="Antibody statistics") %>% 
  kableExtra::kable_styling(bootstrap_options=c("striped", "hover")))
} else { 
  message("Mapping statistics cannot be shown. No valid file provided.")
}

× (Message)
Mapping statistics cannot be shown. No valid file provided.

Dataset

Then, we read 10X data from the files produced by Cell Ranger:

  • barcodes.tsv.gz: All cell barcodes
  • features.tsv.gz: (Ensembl) ID, name, and type for each feature
  • matrix.mtx.gz: Counts for all features

and setup a Seurat object. This object includes different data types in separate assays:

  • Gene Expresssion in assay RNA
  • Antibody Capture in HTO and ADT,
  • CRISPR Guide Capture in Crispr, and
  • Custom in Custom.

Note that “Antibody Capture” features can correspond to “HTO” and “ADT” and are distinguished based on provided HTO names.

# Load the dataset with its assays and create a Seurat object; pass hto_names (or hto_regex if set) so that HTO will be a separate assay
sc = ReadSparseMatrix(params$path_data, project=params$project_id, row_name_column=1, hto_names=params$hto_names, hto_regex=params$hto_regex)

× (Message)
10X data contains more than one type and is being returned as a list containing matrices of each type.

sc = sc[[1]]

# If requested: sample at most n cells
if (!is.null(params$downsample_cells_n)) {
  sampled_barcodes = sample(Seurat::Cells(sc), min(params$downsample_cells_n, length(Seurat::Cells(sc))))
  sc = subset(sc, cells=sampled_barcodes)
}

# Remember original assay names
original_assay_names = setdiff(Seurat::Assays(sc), "HTO")

# Set colours
hto_samples = rownames(sc[["HTO"]])
hto_colours = list()
hto_colours$col_hto_global = ggsci::pal_npg()(3)
names(hto_colours$col_hto_global) = c("Negative", "Doublet", "Singlet")

hto_colours$col_hto_collapsed = ggsci::pal_npg()(length(hto_samples) + 2)
hto_levels = c("Negative", "Doublet", hto_samples[1:length(hto_samples)])
names(hto_colours$col_hto_collapsed) = hto_levels

HTO counts

Sometimes demultiplexing fails, because some HTOs generally do not perform well, or some cells end up with zero HTO counts. To assure the script can run through, we remove HTOs and cells with zero counts.

# Check for HTOs with 0 HTO counts, and remove them if necessary 
hto_counts = Seurat::GetAssayData(sc, assay="HTO", slot="counts") 
hto_counts_row = hto_counts %>% Matrix::rowSums() %>% as.data.frame()
colnames(hto_counts_row) = "HtoCounts"
if (any(hto_counts_row$HtoCounts == 0)) {
  sc[["HTO"]] = subset(sc[["HTO"]], features=rownames(which(hto_counts_row$HtoCounts > 0)))
  warning("Discarded ", sum(hto_counts_row$HtoCounts == 0), " HTOs with 0 HTO counts.")
}

# Check for cells with 0 HTO counts, and remove them if necessary 
hto_counts_col = hto_counts %>% Matrix::colSums() %>% summary() %>% as.matrix() %>% as.data.frame()
colnames(hto_counts_col) = "HtoCounts"
if (any(hto_counts_col$HtoCounts == 0)) {
  sc = subset(sc, cells=rownames(which(hto_counts_col[, "HtoCounts"] > 0)))
  warning("Discarded ", sum(hto_counts_col$HtoCounts == 0), " cells with 0 HTO counts.")
}

# Check for HTOs how many cells they have at least 1 count
hto_counts_row$CellsWithAtLeast1HtoCount = sapply(1:nrow(hto_counts), function(x) sum(hto_counts[x,] > 0))

knitr::kable(hto_counts_row, align="l", caption="HTO counts per HTO") %>% 
  kableExtra::kable_styling(bootstrap_options=c("striped", "hover"))
HTO counts per HTO
HtoCounts CellsWithAtLeast1HtoCount
htoA 3230151 15504
htoB 4841536 16072
htoC 3076823 16379
htoD 5529800 16644
htoE 3514723 16436
htoF 3740005 16686
htoG 4295668 16885
htoH 3895618 16531
knitr::kable(hto_counts_col, align="l", caption="HTO counts per cell (summary)") %>% 
  kableExtra::kable_styling(bootstrap_options=c("striped", "hover"))
HTO counts per cell (summary)
HtoCounts
Min. 4.00
1st Qu. 823.75
Median 1347.50
Mean 1899.05
3rd Qu. 2405.25
Max. 152794.00

Demultiplexing with HTOs

This section of the report shows how cells are assigned to their sample-of-origin.

Normalisation of HTO counts

We start the analysis by normalising raw HTO counts. If LogNormalize was chosen for normalisation, HTO counts for each cell are divided by the total counts for that cell, multiplied by 10,000 and then natural-log transformed. If CLR was chosen for normalisation, HTO counts for each cell are divided by the geometric mean of the counts for that cell and then natural-log transformed.

# HTO
sc = suppressWarnings(Seurat::NormalizeData(sc, assay="HTO", normalization.method=params$norm, verbose=FALSE))

Classification of cells based on normalised HTO data

We assign cells to their sample-of-origin, annotate negative cells that cannot be assigned to any sample, and doublet cells that are assigned to two samples.

# Demultiplex HTOs
sc = Seurat::HTODemux(sc, assay="HTO", positive.quantile=0.95, verbose=FALSE)

# Sort Idents levels for nicer plotting
Seurat::Idents(sc) = factor(Seurat::Idents(sc), levels=hto_levels)
sc$hash.ID = factor(sc$hash.ID, levels=hto_levels)
sc$HTO_classification.global = factor(sc$HTO_classification.global, levels=c("Negative", "Doublet", "Singlet"))

# HTO classification results
hash_ID_table = sc[["hash.ID"]] %>% 
  dplyr::count(hash.ID) %>% 
  dplyr::rename(HTO=hash.ID) %>% 
  dplyr::mutate(Perc=round(n/sum(n)*100,2)) %>% 
  as.data.frame

p1 = ggplot(sc[["HTO_classification.global"]] %>% dplyr::count(HTO_classification.global), 
            aes(x="", y=n, fill=HTO_classification.global)) + 
  geom_bar(width=1, stat="identity") + 
  coord_polar("y", start=0, direction=-1) + 
  AddStyle(title="HTO global classification results", 
           fill=hto_colours$col_hto_global, 
           xlab="", ylab="")

p2 = ggplot(sc[["hash.ID"]] %>% dplyr::count(hash.ID), 
            aes(x="", y=n, fill=hash.ID)) + 
  geom_bar(width=1, stat="identity") + 
  coord_polar("y", start=0, direction=-1) + 
  AddStyle(title="HTO classification results", 
           fill=hto_colours$col_hto_collapsed,
           xlab="", ylab="")

p = p1 + p2 + 
  patchwork::plot_annotation(title="HTO classification results")
p

knitr::kable(hash_ID_table, align="l", caption="HTO classification results") %>% 
        kableExtra::kable_styling(bootstrap_options=c("striped", "hover"))
HTO classification results
HTO n Perc
Negative 145 0.86
Doublet 3349 19.80
htoA 1843 10.90
htoB 1899 11.23
htoC 1775 10.49
htoD 1632 9.65
htoE 1405 8.31
htoF 1442 8.52
htoG 1663 9.83
htoH 1763 10.42

Visualisation of HTO data

This section of the report visualises raw and normalised HTO data to understand whether the demultiplexing step has worked well.

# Distribution of HTO counts before and after normalisation
hto_t_raw = Seurat::GetAssayData(sc, assay="HTO", slot="counts") %>% 
  as.data.frame %>% t %>% as.data.frame
hto_t_raw_pseudo = hto_t_raw + 1
hto_t_norm = Seurat::GetAssayData(sc, assay="HTO", slot="data") %>% 
  as.data.frame %>% t %>% as.data.frame

p1 = ggplot(hto_t_raw_pseudo %>% tidyr::pivot_longer(tidyr::everything()), 
            aes(x=name, y=value, fill=name, group=name)) + 
  geom_violin() + 
  scale_y_continuous(trans="log2") + 
  AddStyle(title="HTO raw counts", 
           fill=hto_colours$col_hto_collapsed, 
           legend_position="none", 
           xlab="", ylab="") +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1))
  
p2 = ggplot(hto_t_norm %>% tidyr::pivot_longer(tidyr::everything()), 
            aes(x=name, y=value, fill=name, group=name)) + 
  geom_violin() + 
  AddStyle(title="HTO normalised counts", 
           fill=hto_colours$col_hto_collapsed, 
           legend_position="none",
           xlab="", ylab="") + 
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1))

p = p1 + p2
p = p + patchwork::plot_annotation("HTO counts before and after normalisation")
p

Pairs of raw (top) and normalised (bottom) HTO counts are visualised to confirm mutal exclusivity in singlet cells. Data points correspond to measured HTO counts per HTO, colours correspond to the assigned samples-of-origin.

n = DfAllColumnCombinations(x=hto_t_raw_pseudo, cell_classification=sc$hash.ID)

# Plot
p = ggplot(n, aes(x=value1, y=value2, color=cell_classification)) + 
  geom_point() + 
  scale_x_continuous(trans="log2") + scale_y_continuous(trans="log2") + 
  AddStyle(col=hto_colours$col_hto_collapsed)

p = p + facet_grid(name2~name1, drop=FALSE) + 
  theme(axis.title.x=element_blank(), strip.text.x=element_text(size=10, color="black"),
        axis.title.y=element_blank(), strip.text.y=element_text(size=10, color="black"),
        strip.background = element_rect(colour="white", fill="lightgrey"),
        legend.position="bottom", 
        axis.text.x=element_text(angle=45, hjust=1, vjust=0.5)) + 
  patchwork::plot_annotation("Raw HTO counts")
p

n = DfAllColumnCombinations(x=hto_t_norm, cell_classification=sc$hash.ID)
n = n[n$value1 > 0 & n$value2 > 0, ]
p = ggplot(n, aes(x=value1, y=value2, color=cell_classification)) + 
  geom_point() + 
  scale_x_continuous(trans="log2") + scale_y_continuous(trans="log2") + 
  AddStyle(col=hto_colours$col_hto_collapsed)

p = p + facet_grid(name2~name1, drop=FALSE) + 
  theme(axis.title.x=element_blank(), strip.text.x=element_text(size=10, color="black"),
        axis.title.y=element_blank(), strip.text.y=element_text(size=10, color="black"),
        strip.background = element_rect(colour="white", fill="lightgrey"),
        legend.position="bottom") + 
  patchwork::plot_annotation("Normalised HTO data")
suppressMessages(p)

The following ridge plots visualise the enrichment of assigned sample-of-origin for the respective normalised HTO counts.

# Group cells based on HTO classification 
p_list = RidgePlot(sc, assay="HTO", features=rownames(Seurat::GetAssay(sc, assay="HTO")), 
                   same.y.lims=TRUE, cols=hto_colours$col_hto_collapsed, combine=FALSE)
for (i in seq(p_list)) p_list[[i]] = p_list[[i]] + AddStyle(legend_position="none")
p = patchwork::wrap_plots(p_list, ncol = 2) + 
  patchwork::plot_annotation("Normalised HTO data")
suppressMessages(p)

Lastly, we compare the counts and number of features between classified cells.

# Number of counts and features in the different cells
nfeature_metrics = grep("_HTO", 
                        grep("nFeature_", colnames(sc[[]]), v=TRUE),
                        v=TRUE,
                        invert=TRUE)
ncounts_metrics = grep("_HTO", 
                        grep("nCount_", colnames(sc[[]]), v=TRUE),
                        v=TRUE,
                        invert=TRUE)

p1 = VlnPlot(sc, features=ncounts_metrics, idents=levels(Seurat::Idents(sc)), pt.size=0) + 
  geom_violin(color=NA) + 
  AddStyle(title="Counts (RNA)", 
           fill=hto_colours$col_hto_collapsed, 
           legend_position="none", 
           xlab="") +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) 

p2 = VlnPlot(sc, features=nfeature_metrics, idents=levels(Seurat::Idents(sc)), pt.size=0) + 
  geom_violin(color=NA) +
  AddStyle(title="Features (RNA)", 
           fill=hto_colours$col_hto_collapsed, 
           legend_position="none", 
           xlab="") +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) 

p = patchwork::wrap_plots(list(p1, p2), ncol=2)
p = p + patchwork::plot_annotation("Number of counts and features for HTO-classified cells")
p

Keeping singlets

This section of the report states the number of cells that remain after negative and doublet cells are removed.

sc.all = sc
sc = subset(sc, idents=c("Negative", "Doublet"), invert=TRUE)
sc
## An object of class Seurat 
## 18242 features across 13422 samples within 2 assays 
## Active assay: RNA (18234 features, 0 variable features)
##  1 other assay present: HTO

Preliminary visualisation of RNA data

This section of the report provides first insights into your RNA dataset based on a preliminary pre-processing of the RNA data using the standard scRNA-seq workflow.

# Normalise, scale, pca umap and clustering for sc
sc = suppressWarnings(Seurat::NormalizeData(sc, assay="RNA", normalization.method = "LogNormalize", scale.factor=10000, verbose=FALSE))
sc = Seurat::FindVariableFeatures(sc, assay="RNA", selection.method="vst", verbose=FALSE)
sc = Seurat::ScaleData(sc, assay="RNA", features=Seurat::VariableFeatures(sc, assay="RNA"), verbose=FALSE)
sc = Seurat::RunPCA(sc, assay="RNA", features=VariableFeatures(object=sc, assay="RNA"), verbose=FALSE, npcs=min(50, ncol(sc)))
sc = suppressWarnings(Seurat::RunUMAP(sc, dims=1:10, verbose=FALSE, umap.method="uwot"))
sc = Seurat::FindNeighbors(sc, dims=1:10, verbose=FALSE)
sc = Seurat::FindClusters(sc, algorithm=1, verbose=FALSE, method="igraph")
Idents(sc) = "hash.ID"

# Normalise, scale, pca umap and clustering for sc.all
sc.all = suppressWarnings(Seurat::NormalizeData(sc.all, assay="RNA", normalization.method = "LogNormalize", scale.factor=10000, verbose=FALSE))
sc.all = Seurat::FindVariableFeatures(sc.all, assay="RNA", selection.method="vst", verbose=FALSE)
sc.all = Seurat::ScaleData(sc.all, assay="RNA", features=Seurat::VariableFeatures(sc.all, assay="RNA"), verbose=FALSE)
sc.all = Seurat::RunPCA(sc.all, assay="RNA", features=VariableFeatures(object=sc.all, assay="RNA"), verbose=FALSE, npcs=min(50, ncol(sc.all)))
sc.all = suppressWarnings(Seurat::RunUMAP(sc.all, dims=1:10, verbose=FALSE, umap.method="uwot"))
sc.all = Seurat::FindNeighbors(sc.all, dims=1:10, verbose=FALSE)
sc.all = Seurat::FindClusters(sc.all, algorithm=1, verbose=FALSE, method="igraph")
Idents(sc.all) = "hash.ID"

# Set figure height
sc_fig_height = round((length(hto_samples))*1.25)
sc_qc_fig_height = round((length(levels(sc$seurat_clusters)))*0.75)
sc_all_fig_height = round((length(hto_samples)+2)*1.25)
sc_all_qc_fig_height = round((length(levels(sc.all$seurat_clusters)))*0.75)

# Set cluster colors
cluster_names = levels(sc$seurat_clusters)
sc_col_clusters = GenerateColours(num_colours=length(cluster_names), palette="ggsci::pal_d3", palette_options = list(palette="category20"))
names(sc_col_clusters) = cluster_names

cluster_names = levels(sc.all$seurat_clusters)
sc_all_col_clusters = GenerateColours(num_colours=length(cluster_names), palette="ggsci::pal_d3", palette_options = list(palette="category20"))
names(sc_all_col_clusters) = cluster_names

Visualisation with UMAP

We use a UMAP to visualise and explore a dataset. The goal is to place similar cells together in 2D space, and learn about the biology underlying the data. Cells are color-coded according to the assigned sample-of-origin.

Take care not to mis-read a UMAP:

  • Parameters influence the plot (we use defaults here)
  • Cluster sizes relative to each other mean nothing, since the method has a local notion of distance
  • Distances between clusters might not mean anything
  • You may need more than one plot

For a nice read to intuitively understand UMAP, see https://pair-code.github.io/understanding-umap/.

Before filtering

p = Seurat::DimPlot(sc.all, reduction="umap", group.by="hash.ID", cols=hto_colours$col_hto_collapsed) + 
  AddStyle(title="UMAP, cells coloured by HTO classification, including doublets and negatives", 
           legend_title="Cell classification", 
           legend_position="bottom")
p

  • Note: HTO colors are the same before and after filtering

After filtering

# Plot UMAP after HTO filtering
p = Seurat::DimPlot(sc, reduction="umap", group.by="hash.ID", cols=hto_colours$col_hto_collapsed) + 
  AddStyle(title="UMAP, cells coloured by HTO classification, singlets only", 
           legend_title="Cell classification", 
           legend_position="bottom")
p

  • Note: HTO colors are the same before and after filtering

Before filtering (each hto)

p = Seurat::DimPlot(sc.all, reduction="umap", group.by="seurat_clusters", split.by="hash.ID", ncol=2, cols=sc_all_col_clusters) + 
  AddStyle(title="UMAP for each HTO, including doublets and negatives", 
           legend_title="Cluster", 
           legend_position="right")
p

  • Important: Cluster colors are not neccessarily the same before and after filtering!

After filtering (each hto)

p = Seurat::DimPlot(sc, reduction="umap", group.by="seurat_clusters", split.by="hash.ID", ncol=2, cols=sc_col_clusters) + 
  AddStyle(title="UMAP for each HTO, singlets only", 
           legend_title="Cluster", 
           legend_position="right")
p

  • Important: Cluster colors are not neccessarily the same before and after filtering!

Distribution of cells in HTO and in clusters

Next, we examine how the HTOs are distributed in the individual clusters and how the clusters are distributed in each individual HTO. We include negatives and doublets as they often group into specific clusters.

p = ggplot(sc.all[[c("seurat_clusters","hash.ID")]], aes(x=seurat_clusters, fill=hash.ID)) +
  geom_bar(position="fill", colour=NA) +
  scale_x_discrete("Cluster") +
  scale_y_continuous("Fraction of cells") +
  scale_fill_manual(values=hto_colours$col_hto_collapsed) +
  AddStyle(title="HTO cells in cluster", 
           legend_title="Cell classification", 
           legend_position="right")  
p

p = ggplot(sc.all[[c("seurat_clusters","hash.ID")]], aes(x=hash.ID, fill=seurat_clusters)) +
  geom_bar(position="fill", colour=NA) +
  scale_x_discrete("Cluster") +
  scale_y_continuous("Fraction of cells") +
  scale_fill_manual(values=sc_all_col_clusters) +
  AddStyle(title="Clusters in individual HTO", 
           legend_title="Cluster", 
           legend_position="right")  +
  theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) 
p

Here it also helps to inspect RNA counts and features per cluster and hto.

ncounts_metrics = grep("_HTO", 
                        grep("nCount_", colnames(sc.all[[]]), v=TRUE),
                        v=TRUE,
                        invert=TRUE)

p = ggplot(sc.all[[c("hash.ID", "seurat_clusters", ncounts_metrics[1])]], aes_string(x="hash.ID", y=ncounts_metrics[1], fill="hash.ID", group="hash.ID")) +
  geom_violin() +
  xlim(levels(sc.all$hash.ID)) +
  scale_y_continuous("Counts (RNA)") +
  facet_wrap(~seurat_clusters, ncol=3, scales="free_y") +
  #scale_fill_manual(values=hto_colours$col_hto_collapsed) +
  AddStyle(fill=hto_colours$col_hto_collapsed, 
           legend_title="Classified cells",
           legend_position="bottom", 
           xlab="") +
  theme(axis.text.x = element_text(angle=45, hjust=1), legend.position="none")
p = p + patchwork::plot_annotation("Number of counts for each cluster and HTO")
p

nfeature_metrics = grep("_HTO", 
                        grep("nFeature_", colnames(sc.all[[]]), v=TRUE),
                        v=TRUE,
                        invert=TRUE)


p = ggplot(sc.all[[c("hash.ID", "seurat_clusters", nfeature_metrics[1])]], aes_string(x="hash.ID", y=nfeature_metrics[1], fill="hash.ID", group="hash.ID")) +
  geom_violin() +
  xlim(levels(sc.all$hash.ID)) +
  facet_wrap(~seurat_clusters, ncol=3, scales="free_y") +
  scale_y_continuous("Features (RNA)") +
  #scale_fill_manual(values=hto_colours$col_hto_collapsed) +
  AddStyle(fill=hto_colours$col_hto_collapsed, 
           legend_title="Classified cells",
           legend_position="bottom", 
           xlab="") +
  theme(axis.text.x = element_text(angle=45, hjust=1), legend.position="none")
p = p + patchwork::plot_annotation("Number of features for each cluster and HTO")
p

Write out demultiplexed data

Finally, demultiplexed RNA data are written back to file. Results are included as metadata which may help for further analysis (e.g. filtering).

# Save each sample in a separate directory
Idents(sc) = "hash.ID"
sc$HTO_hash_ID = sc$hash.ID
samples = levels(Seurat::Idents(sc))
demux_samples_paths = c()
for (s in samples) {
  p = ExportSeuratAssayData(sc[,Seurat::Idents(sc)==s], 
                            dir=file.path(params$path_out, s), 
                            assays=original_assay_names, 
                            slot="counts",
                            include_cell_metadata_cols=c("HTO_classification", 
                                                         "HTO_hash_ID"))
  demux_samples_paths = c(demux_samples_paths, p)
}
message("Demultiplexed datasets are: ", paste(demux_samples_paths, collapse=", "))

× (Message)
Demultiplexed datasets are: test_datasets/10x_pbmc_hto_GSE108313/demultiplexed/htoA, test_datasets/10x_pbmc_hto_GSE108313/demultiplexed/htoB, test_datasets/10x_pbmc_hto_GSE108313/demultiplexed/htoC, test_datasets/10x_pbmc_hto_GSE108313/demultiplexed/htoD, test_datasets/10x_pbmc_hto_GSE108313/demultiplexed/htoE, test_datasets/10x_pbmc_hto_GSE108313/demultiplexed/htoF, test_datasets/10x_pbmc_hto_GSE108313/demultiplexed/htoG, test_datasets/10x_pbmc_hto_GSE108313/demultiplexed/htoH

# Export cell classification for Loupe
demux_samples_paths = c()
sc.all$HTO_hash_ID = sc.all$hash.ID
loupe_meta = as.data.frame(sc.all@meta.data)
loupe_meta = loupe_meta[,c("HTO_classification", "HTO_hash_ID")]
idx_keep = sapply(1:ncol(loupe_meta), function(x) !is.numeric(loupe_meta[,x]))
loupe_meta = cbind(Barcode=rownames(loupe_meta), loupe_meta[, idx_keep])
p = file.path(params$path_out, "cell_classification_for_loupe.csv")
write.table(x=loupe_meta, file=p, col.names=TRUE, row.names=FALSE, quote=FALSE, sep=",")
demux_samples_paths = c(demux_samples_paths, p)

message(paste("Classification file for Loupe is:", demux_samples_paths))

× (Message)
Classification file for Loupe is: test_datasets/10x_pbmc_hto_GSE108313/demultiplexed/cell_classification_for_loupe.csv

Credits

This Pre-Workflow was developed as part of the scrnaseq GitHub repository by Katrin Sameith and Andreas Petzold at the Dresden-concept Genome Center, TU Dresden (Dresden, Germany), in collaboration with Torsten Glomb and Oliver Dittrich at the Research Core Unit Genomics, Hannover Medical School (Hannover, Germany). The Seurat Vignettes were initially used as templates.

Parameter table

The following parameters were used to run the workflow.

out = ScrnaseqParamsInfo(params=params)

knitr::kable(out, align="l") %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"), full_width=FALSE, position="left")
Name Value
author katrin
project_id HTO_testDataset
path_data test_datasets/10x_pbmc_hto_GSE108313/counts
stats
path_out test_datasets/10x_pbmc_hto_GSE108313/demultiplexed
hto_names htoA=htoA, htoB=htoB, htoC=htoC, htoD=htoD, htoE=htoE, htoF=htoF, htoG=htoG, htoH=htoH
hto_regex
norm CLR
mt_names ^MT-
col palevioletred
downsample_cells_n
path_to_git .
debugging_mode default_debugging

Software versions

This report was generated using the scrnaseq GitHub repository. Software versions were collected at run time.

out = ScrnaseqSessionInfo(params$path_to_git)

× (Message)
Registered S3 method overwritten by ‘cli’:
method from

print.boxx spatstat.geom

knitr::kable(out, align="l") %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"), full_width=FALSE, position="left")
Name Version
ktrns/scrnaseq ef4244baed834f75fbb952c75a2e88a4953bf7ce
R R version 4.0.5 (2021-03-31)
Platform x86_64-apple-darwin17.0 (64-bit)
Operating system macOS Big Sur 10.16
Packages abind1.4-5, assertthat0.2.1, bslib0.2.4, cli2.4.0, cluster2.1.1, codetools0.2-18, colorspace2.0-0, cowplot1.1.1, crayon1.4.1, data.table1.14.0, DBI1.1.1, deldir0.2-10, digest0.6.27, dplyr1.0.5, ellipsis0.3.1, evaluate0.14, fansi0.4.2, farver2.1.0, fastmap1.1.0, fitdistrplus1.1-3, future1.21.0, future.apply1.7.0, generics0.1.0, ggplot23.3.3, ggrepel0.9.1, ggridges0.5.3, ggsci2.9, globals0.14.0, glue1.4.2, goftest1.2-2, gridExtra2.3, gtable0.3.0, highr0.8, htmltools0.5.1.1, htmlwidgets1.5.3, httpuv1.5.5, httr1.4.2, ica1.0-2, igraph1.2.6, irlba2.3.3, jquerylib0.1.3, jsonlite1.7.2, kableExtra1.3.4, KernSmooth2.23-18, knitr1.31, labeling0.4.2, later1.1.0.1, lattice0.20-41, lazyeval0.2.2, leiden0.3.7, lifecycle1.0.0, listenv0.8.0, lmtest0.9-38, magrittr2.0.1, MASS7.3-53.1, Matrix1.3-2, matrixStats0.58.0, mgcv1.8-34, mime0.10, miniUI0.1.1.1, munsell0.5.0, nlme3.1-152, parallelly1.24.0, patchwork1.1.1, pbapply1.4-3, pillar1.6.0, pkgconfig2.0.3, plotly4.9.3, plyr1.8.6, png0.1-7, polyclip1.10-0, promises1.2.0.1, purrr0.3.4, R.methodsS31.8.1, R.oo1.24.0, R.utils2.10.1, R62.5.0, RANN2.6.1, RColorBrewer1.1-2, Rcpp1.0.6, RcppAnnoy0.0.18, reshape21.4.4, reticulate1.18, rlang0.4.10, rmarkdown2.7, ROCR1.0-11, rpart4.1-15, RSpectra0.16-0, rstudioapi0.13, Rtsne0.15, rvest1.0.0, sass0.3.1, scales1.1.1, scattermore0.7, sctransform0.3.2, sessioninfo1.1.1, Seurat4.0.1, SeuratObject4.0.0, shiny1.6.0, spatstat.core2.0-0, spatstat.data2.1-0, spatstat.geom2.0-1, spatstat.sparse2.0-0, spatstat.utils2.1-0, stringi1.5.3, stringr1.4.0, survival3.2-10, svglite2.0.0, systemfonts1.0.1, tensor1.5, tibble3.1.0, tidyr1.1.3, tidyselect1.1.0, utf81.2.1, uwot0.1.10, vctrs0.3.7, viridisLite0.4.0, webshot0.5.2, withr2.4.1, xfun0.22, xml21.3.2, xtable1.8-4, yaml2.2.1, zoo1.8-9